사이트에서 URL 뽑아내기 :: 자바네트워크I/O[SSISO Community]
 
SSISO 카페 SSISO Source SSISO 구직 SSISO 쇼핑몰 SSISO 맛집
추천검색어 : JUnit   Log4j   ajax   spring   struts   struts-config.xml   Synchronized   책정보   Ajax 마스터하기   우측부분

자바네트워크I/O
[1]
등록일:2008-03-11 19:48:54 (0%)
작성자:
제목:사이트에서 URL 뽑아내기
import  java.io.InputStreamReader;
import  java.io.Reader;
import  java.net.HttpURLConnection;
import  java.net.URL;
import  java.net.URLConnection;  

import  javax.swing.text.Document;
import  javax.swing.text.EditorKit;
import  javax.swing.text.ElementIterator;
import  javax.swing.text.SimpleAttributeSet;
import  javax.swing.text.html.HTML;
import  javax.swing.text.html.HTMLEditorKit;  

public  class  EnumerateURLLink
{

      public  static  void  main(String[]  args)
      {
            HttpURLConnection.setFollowRedirects(false);
            EditorKit  kit  =  new  HTMLEditorKit();
            Document  doc  =  kit.createDefaultDocument();  

            //  The  Document  class  does  not  yet  handle

//  charset's  properly.
            doc.putProperty("IgnoreCharsetDirective",  Boolean.TRUE);

  

            try
            {

                  //  Create  a  reader  on  the  HTML  content.
                  URL  url_  =  new  URL("http://www.naver.com");
                  URLConnection  conn  =  url_.openConnection();  

                  Reader  rd  =  new  InputStreamReader(conn.getInputStream());
  

                  //  Parse  the  HTML.
                  kit.read(rd,  doc,  0);  

                  //  Iterate  through  the  elements  of  the  HTML  document.
                  ElementIterator  it  =  new  ElementIterator(doc);
                  javax.swing.text.Element  elem;                        

                  while((elem  =  it.next())  !=  null)
                  {
                        SimpleAttributeSet  s  =  (SimpleAttributeSet)
                        elem.getAttributes().getAttribute(HTML.Tag.A);
                        
                        

                        if  (s  !=  null)
                        {
                         String  link  =  (String)s.getAttribute(HTML.Attribute.HREF);                                
                        
                         if  (link.indexOf("mailto:")  !=  -1)  {
                                        continue;
                                }

                                //  Skip  JavaScript  links.
                                if  (link.toLowerCase().indexOf("javascript")  !=  -1)  {
                                    continue;
                                }
                                
                                if  (link.indexOf("://")  ==  -1)  {
                                        //  Handle  absolute  URLs.
                                        if  (link.charAt(0)  ==  '/')  {
                                            link  =  "http://"  +  url_.getHost()  +  link;
                                            //  Handle  relative  URLs.
                                        }  else  {
                                            String  file  =  url_.getFile();
                                            if  (file.indexOf('/')  ==  -1)  {
                                                link  =  "http://"  +  url_.getHost()  +  "/"  +  link;
                                            }  else  {
                                                String  path  =  file.substring(0,
                                                        file.lastIndexOf('/')  +  1);
                                                link  =  "http://"  +  url_.getHost()  +  path  +  link;
                                            }
                                        }
                                    }
                                
                              System.out.println("URL  Link  =  "  +  link);
                        }
                  }
            }
            catch  (Exception  e)
            {
                  e.printStackTrace();
            }
      }
}
[본문링크] 사이트에서 URL 뽑아내기
[1]
코멘트(이글의 트랙백 주소:/cafe/tb_receive.php?no=2507
작성자
비밀번호

 

SSISOCommunity

[이전]

Copyright byCopyright ⓒ2005, SSISO Community All Rights Reserved.